Author: Dmitry Kobak & Lan Huong Nguyen
import os, sys, mkl, time, pickle
#limit the number of threds numpy/scipy are using
nthreads = 15
os.environ["OMP_NUM_THREADS"] = str(nthreads)
mkl.set_num_threads(nthreads)
import numpy as np
import multiprocessing as mp
# plotting
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.titlesize'] = 24
mpl.rcParams['axes.titlesize'] = 20
mpl.rcParams['axes.labelsize'] = 20
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20
# Custom pyscripts
maindir = '/home/lanhuong/Projects/ManifoldLearning/DiffusionTSNE'
os.chdir(maindir)
sys.path.insert(0, maindir)
from diffusion_tsne import diffusion_tsne
from plotting import *
from generate_data import *
from utils import *
from metrics import *
%load_ext autoreload
%autoreload 2
# 10 nice colors
col = np.array(['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99',
'#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a'])
MACHINE_EPSILON = np.finfo(np.double).eps
# Load MNIST data
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float64') / 255
x_test = x_test.reshape(10000, 784).astype('float64') / 255
X = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))
print(X.shape)
# Do PCA and keep 50 dimensions
X = X - X.mean(axis=0)
U, s, V = np.linalg.svd(X, full_matrices=False)
X50 = np.dot(U, np.diag(s))[:,:50]
# We will use PCA initialization later on
PCAinit = X50[:,:2] / np.std(X50[:,0]) * 0.0001
# Running t-SNE on the full MNIST
start = time.time()
Z50_0 = diffusion_tsne(X50, perplexity=100, seed=42, scale_probs = False,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('t-SNE embedding in %f sec' %(end-start))
plot2D(Z50_0, label=y, s=3, figsize=(10,10))
plt.legend(fontsize=14, markerscale=6)
betas0= np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees0 = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist0 = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Z50_0, label= 1/(2*betas0), s=25, figsize=(12,10))
plt.axis('equal')
# Running t-SNE on the full MNIST
start = time.time()
Z50 = diffusion_tsne(X50, perplexity=100, seed=42, scale_probs = False,
late_exag_coeff=2, start_late_exag_iter=800,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('t-SNE embedding in %f sec' %(end-start))
plot2D(Z50, label=y, s=3, figsize=(10,10))
plt.legend(fontsize=14, markerscale=6)
betas = np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Z50, label= 1/(2*betas), s=25, figsize=(12,10))
plt.axis('equal')
start = time.time()
Z50scale_0 = diffusion_tsne(X50, perplexity=100, seed=123, scale_probs = True,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('Scaled t-SNE embedding in %f sec' %(end-start))
plot2D(Z50scale_0, label=y, s=3, figsize=(10,10))
plt.legend(fontsize=14, markerscale=6)
betas2_0 = np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees2_0 = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist2_0 = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Z50scale_0, label= 1/(2*betas2_0), s=25, figsize=(12, 10))
plt.axis('equal')
start = time.time()
Z50scale = diffusion_tsne(X50, perplexity=100, seed=42, scale_probs = True,
stop_early_exag_iter=300,
late_exag_coeff=2, start_late_exag_iter=800,
nthreads=nthreads, load_affinities="save")
end = time.time()
print('t-SNE embedding in %f sec' %(end-start))
plot2D(Z50scale, label=y, s=3, figsize=(10,10))
plt.legend(fontsize=14, markerscale=6)
betas2 = np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees2 = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist2 = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Z50scale, label= 1/(2*betas2), s=25, figsize=(12, 10))
plt.axis('equal')
plot2D(Z50scale, label=y, s=3, figsize=(10,10))
plt.legend(fontsize=14, markerscale=6)
betas2 = np.fromfile('betas.dat', dtype=np.dtype('d'))
degrees2 = np.fromfile('affinity_rowsums.dat', dtype=np.dtype('d'))
meandist2 = np.fromfile('mean_dists.dat', dtype=np.dtype('d'))
plot2D(Z50scale, label= 1/(2*betas2), s=25, figsize=(12, 10))
plt.axis('equal')